library(tidyverse)
library(plotly)
library(sf)
library(tigris)
library(leaflet)
library(censusapi)
Sys.setenv(CENSUS_KEY="c8aa67e4086b4b5ce3a8717f59faa9a28f611dab")
bay_area_zipcodes <- read_csv("https://data.sfgov.org/api/views/f9wk-m4qb/rows.csv?accessType=DOWNLOAD")
bay_area_zip_final <-
bay_area_zipcodes %>%
select(ZIP)
years <- 2017:2019
quarters <- 1:4
type <- "Electric"
pge_elec <- NULL
for(year in years) {
for(quarter in quarters) {
filename <-
paste0(
"pge/PGE_",
year,
"_Q",
quarter,
"_",
type,
"UsageByZip.csv"
)
print(filename)
temp <- read_csv(filename)
pge_elec <- rbind(pge_elec,temp)
}}
## [1] "pge/PGE_2017_Q1_ElectricUsageByZip.csv"
## [1] "pge/PGE_2017_Q2_ElectricUsageByZip.csv"
## [1] "pge/PGE_2017_Q3_ElectricUsageByZip.csv"
## [1] "pge/PGE_2017_Q4_ElectricUsageByZip.csv"
## [1] "pge/PGE_2018_Q1_ElectricUsageByZip.csv"
## [1] "pge/PGE_2018_Q2_ElectricUsageByZip.csv"
## [1] "pge/PGE_2018_Q3_ElectricUsageByZip.csv"
## [1] "pge/PGE_2018_Q4_ElectricUsageByZip.csv"
## [1] "pge/PGE_2019_Q1_ElectricUsageByZip.csv"
## [1] "pge/PGE_2019_Q2_ElectricUsageByZip.csv"
## [1] "pge/PGE_2019_Q3_ElectricUsageByZip.csv"
## [1] "pge/PGE_2019_Q4_ElectricUsageByZip.csv"
years <- 2017:2019
quarters <- 1:4
type <- "Gas"
pge_gas <- NULL
for(year in years) {
for(quarter in quarters) {
filename <-
paste0(
"pge/PGE_",
year,
"_Q",
quarter,
"_",
type,
"UsageByZip.csv"
)
print(filename)
temp <- read_csv(filename)
pge_gas <- rbind(pge_gas,temp)
}}
## [1] "pge/PGE_2017_Q1_GasUsageByZip.csv"
## [1] "pge/PGE_2017_Q2_GasUsageByZip.csv"
## [1] "pge/PGE_2017_Q3_GasUsageByZip.csv"
## [1] "pge/PGE_2017_Q4_GasUsageByZip.csv"
## [1] "pge/PGE_2018_Q1_GasUsageByZip.csv"
## [1] "pge/PGE_2018_Q2_GasUsageByZip.csv"
## [1] "pge/PGE_2018_Q3_GasUsageByZip.csv"
## [1] "pge/PGE_2018_Q4_GasUsageByZip.csv"
## [1] "pge/PGE_2019_Q1_GasUsageByZip.csv"
## [1] "pge/PGE_2019_Q2_GasUsageByZip.csv"
## [1] "pge/PGE_2019_Q3_GasUsageByZip.csv"
## [1] "pge/PGE_2019_Q4_GasUsageByZip.csv"
year <- 2020
quarters <- 1:3
type <- "Electric"
for(quarter in quarters) {
filename <-
paste0(
"pge/PGE_",
year,
"_Q",
quarter,
"_",
type,
"UsageByZip.csv"
)
print(filename)
temp <- read_csv(filename)
pge_elec <- rbind(pge_elec,temp)
}
## [1] "pge/PGE_2020_Q1_ElectricUsageByZip.csv"
## [1] "pge/PGE_2020_Q2_ElectricUsageByZip.csv"
## [1] "pge/PGE_2020_Q3_ElectricUsageByZip.csv"
year <- 2020
quarters <- 1:3
type <- "Gas"
for(quarter in quarters) {
filename <-
paste0(
"pge/PGE_",
year,
"_Q",
quarter,
"_",
type,
"UsageByZip.csv"
)
print(filename)
temp <- read_csv(filename)
pge_gas <- rbind(pge_gas,temp)
}
## [1] "pge/PGE_2020_Q1_GasUsageByZip.csv"
## [1] "pge/PGE_2020_Q2_GasUsageByZip.csv"
## [1] "pge/PGE_2020_Q3_GasUsageByZip.csv"
saveRDS(pge_elec, "pge_elec.rds")
saveRDS(pge_gas, "pge_gas.rds")
saveRDS(bay_area_zip_final, "bay_area_zip_final.rds")
save(pge_elec, pge_gas, bay_area_zip_final, file = "pge_datasets.rda")
pge_elec_final <-
pge_elec %>%
mutate(
DATE =
as.Date(
paste(
YEAR,
"-",
MONTH,
"-01",
sep=""
)
)
) %>%
filter(
ZIPCODE %in%
c(bay_area_zip_final$ZIP)
) %>%
filter(
CUSTOMERCLASS %in%
c(
"Elec- Residential",
"Elec- Commercial"
)
) %>%
select(
!c(COMBINED, AVERAGEKWH, YEAR, MONTH)
) %>%
group_by(DATE, CUSTOMERCLASS) %>%
summarize(
TOTALKBTU =
sum(
TOTALKWH*3.412,
na.rm = T
),
TOTALCUSTOMERS =
sum(
TOTALCUSTOMERS,
na.rm = T
)
)%>%
mutate(
TYPE = "ELEC"
)
pge_gas_final <-
pge_gas %>%
mutate(
DATE =
as.Date(
paste(
YEAR,
"-",
MONTH,
"-01",
sep=""
)
)
) %>%
filter(
ZIPCODE %in%
c(bay_area_zip_final$ZIP)
) %>%
filter(
CUSTOMERCLASS %in%
c(
"Gas- Residential",
"Gas- Commercial"
)
) %>%
select(
!c(COMBINED, AVERAGETHM, YEAR, MONTH)
) %>%
group_by(DATE, CUSTOMERCLASS) %>%
summarize(
TOTALKBTU =
sum(
TOTALTHM*99.976,
na.rm = T
),
TOTALCUSTOMERS =
sum(
TOTALCUSTOMERS,
na.rm = T
)
)%>%
mutate(
TYPE = "GAS"
)
pge_final <- rbind(pge_gas_final,pge_elec_final)
pge_chart <-
pge_final %>%
ggplot() +
geom_bar(
aes(
x = DATE,
y = TOTALKBTU,
fill = CUSTOMERCLASS
),
stat = "identity",
position = "stack"
) +
labs(
x = "Date",
y = "kBTU",
title = "PG&E Bay Area Monthly Gas & Electricity Usage, 2017-2020",
fill = "Electricity/Gas Type"
)
pge_chart %>%
ggplotly() %>%
layout(
xaxis = list(fixedrange = F),
yaxis = list(fixedrange = F)
) %>%
config(displayModeBar = F)
#It appears that there is a significant season variation, especially in gas usage, which overshadows impacts of the COVID-19 pandemic. Still we can see small differences in the proportion of energy between commercial and residential. Lockdowns began mid-March, meaning that April was the first full month impacted by the pandemic. We can see that energy usage by the commercial section was lower in April 2020 than in previous years, while residential usage remained similar or even slightly higher.
#Electricity consumption varies each year by season. Seasonal changes appear to have a greater impact on total electricity consumption than the pandemic has on the whole. For that reason, to investigate the impact of COVID on different neighborhoods (defined here as different zipcodes based on the data available), I looked at compared electricity usage data from April 2020 with electricity usage in April 2019. Then, I mapped those results (i.e. change in electricity consumption). I assumed that we were still only looking at residential (spefication of neighborhoods not industrial parks/commercial centers).
library(tidyverse)
library(plotly)
library(sf)
library(tigris)
library(leaflet)
library(censusapi)
Sys.setenv(CENSUS_KEY="c8aa67e4086b4b5ce3a8717f59faa9a28f611dab")
years <- 2019
quarters <- 1:4
type <- "Electric"
pge_elec_b <- NULL
for(year in years) {
for(quarter in quarters) {
filename <-
paste0(
"pge/PGE_",
year,
"_Q",
quarter,
"_",
type,
"UsageByZip.csv"
)
temp <- read_csv(filename)
pge_elec_b <- rbind(pge_elec_b,temp)
}}
year <- 2020
quarters <- 1:3
type <- "Electric"
for(quarter in quarters) {
filename <-
paste0(
"pge/PGE_",
year,
"_Q",
quarter,
"_",
type,
"UsageByZip.csv"
)
temp <- read_csv(filename)
pge_elec_b <- rbind(pge_elec_b,temp)
}
saveRDS(pge_elec_b, "pge_elec_b.rds")
bay_county_names <-
c(
"Alameda",
"Contra Costa",
"Marin",
"Napa",
"San Francisco",
"San Mateo",
"Santa Clara",
"Solano",
"Sonoma"
)
bay_counties <-
counties("CA", cb = T, progress_bar = F) %>%
filter(NAME %in% bay_county_names)
usa_zips <-
zctas(cb = T, progress_bar = F)
bay_zips <-
usa_zips %>%
st_centroid() %>%
.[bay_counties, ] %>%
st_set_geometry(NULL) %>%
left_join(usa_zips %>% select(GEOID10)) %>%
st_as_sf()
pge_elec_final1b <-
pge_elec_b %>%
filter(CUSTOMERCLASS == "Elec- Residential") %>%
mutate(
ZIPCODE = ZIPCODE %>% as.character()
)%>%
group_by(ZIPCODE, YEAR) %>%
summarize(
TOTALKBTU =
sum(
TOTALKWH*3.412,
na.rm = T
)
) %>%
group_by(ZIPCODE) %>%
mutate(
PCTCHANGEKBTU =
((TOTALKBTU - lag(TOTALKBTU))/lag(TOTALKBTU))*100
)%>%
right_join(
bay_zips %>% select(GEOID10),
by = c("ZIPCODE" = "GEOID10")
) %>%
st_as_sf()%>%
st_transform(4326)
res_pal <- colorNumeric(
palette = "Blues",
domain =
pge_elec_final1b$PCTCHANGEKBTU
)
leaflet() %>%
addTiles() %>%
addPolygons(
data = pge_elec_final1b,
fillColor = ~res_pal(PCTCHANGEKBTU),
color = "white",
opacity = 0.5,
fillOpacity = 0.5,
weight = 1,
label = ~paste0(
round(PCTCHANGEKBTU),
" percent change in total BTU for the zipcode ",
ZIPCODE
),
highlightOptions = highlightOptions(
weight = 2,
opacity = 1
)
) %>%
addLegend(
data = pge_elec_final1b,
pal = res_pal,
values = ~PCTCHANGEKBTU,
title = "Percent Change<br>KBTU, April 2020 compared to April 2019"
)
#In the map we can see that there is a greater percent reduction in residential electricity usage in zipcodes surrounding San Francisco (the city) compared to outlying zipcodes in the Bay Area. This aligns with the logic that people may have migrated away from cities during the pandemic as a result of remote work, reduction in activities allowed in cities, and desire for greater residential space.